library(fixest) # to demean variables
library(multiwayvcov) # to cluster SEs
library(sandwich) # to get HC2 SEs
library(stargazer) # to generate tables
library(mvtnorm) # to simulate betas and vcov matrix

rm(list = ls())

# load data
load(file = 'chDataParty.RData')

# Put each dataset in a different object
ch_reformOri <- ch_list[[1]]
ch_refTermOri <- ch_list[[2]]
ch_pre_postOri <- ch_list[[3]]
ch_refTermMChangesOri <- ch_list[[4]]

# "Gonzalo Fuenzalida" and 'Javier Macaya' changed district after the reform
ch_reformOri <- subset(ch_reformOri, !(bill_author %in% c("Gonzalo Fuenzalida", 'Javier Macaya')))
ch_refTermOri <- subset(ch_refTermOri, !(bill_author %in% c("Gonzalo Fuenzalida", 'Javier Macaya')))
ch_pre_postOri <- subset(ch_pre_postOri, !(bill_author %in% c("Gonzalo Fuenzalida", 'Javier Macaya')))
ch_refTermMChangesOri <- subset(ch_refTermMChangesOri, !(bill_author %in% c("Gonzalo Fuenzalida", 'Javier Macaya')))

partyOut <- function(partyout){
	# Keep specific parties (PDC, PPD, PS, RN, UDI)
	ch_reform <- subset(ch_reformOri, party != partyout)
	ch_refTerm <- subset(ch_refTermOri, party != partyout)
	ch_pre_post <- subset(ch_pre_postOri, party != partyout)
	ch_refTermMChanges <- subset(ch_refTermMChangesOri, party != partyout)

	################################# 
	####### Models using M ##########
	#################################

	# M1: G*M C1,C2,C3 (all data)
	data_m1 <- fixest::demean(women_pct + magAnalysis + female + magAnalysisWoman
		~ session, data = ch_refTerm)
	data_m1 <- cbind(data_m1, bill_author = ch_refTerm$bill_author)
	m1 <- lm(women_pct ~ magAnalysis + female + magAnalysisWoman - 1, data = data_m1)
	vcov1 <- cluster.vcov(m1, cluster = data_m1$bill_author)
	s1  <- sqrt(diag(vcov1))

	# M2: G*M C1,C2,C3 (M changes at the middle of C2)
	data_m2 <- fixest::demean(women_pct + magAnalysis + female + magAnalysisWoman
		~ session + reform, data = ch_refTermMChanges)
	data_m2 <- cbind(data_m2, bill_author = ch_refTermMChanges$bill_author)
	m2 <- lm(women_pct ~ magAnalysis + female + magAnalysisWoman - 1, data = data_m2)
	vcov2 <- cluster.vcov(m2, cluster = data_m2$bill_author)
	s2  <- sqrt(diag(vcov2))


	# M3: G*M C1,C3
	data_m3 <- fixest::demean(women_pct + magAnalysis + female + magAnalysisWoman
		~ session, data = subset(ch_refTerm, session %in% c('2010-2014', '2018-2022')))
	data_m3 <- cbind(data_m3, bill_author = subset(ch_refTerm, session %in% c('2010-2014', '2018-2022'))$bill_author)
	m3 <- lm(women_pct ~ magAnalysis + female + magAnalysisWoman - 1, data = data_m3)
	vcov3 <- cluster.vcov(m3, cluster = data_m3$bill_author)
	s3  <- sqrt(diag(vcov3))

	# M4: C2
	inSample <- names(table(ch_pre_post$bill_author)[table(ch_pre_post$bill_author) == 2])
	ch_pre_post <- subset(ch_pre_post, bill_author %in% inSample)
	data_m4 <- fixest::demean(women_pct + magAnalysis + female + magAnalysisWoman
		~ bill_author, data = ch_pre_post)
	data_m4 <- cbind(data_m4, bill_author = ch_pre_post$bill_author)
	m4 <- lm(women_pct ~ magAnalysis + magAnalysisWoman - 1, data = data_m4)
	vcov4 <- cluster.vcov(m4, cluster = data_m4$bill_author)
	s4  <- sqrt(diag(vcov4))

	# M5: G*M C1,C2,C3 (only repeated legislators)
	data_m5 <- fixest::demean(women_pct + magAnalysis + magAnalysisWoman
		~ bill_author + session, data = subset(ch_refTerm, allSessions == 1))
	data_m5 <- cbind(data_m5, bill_author = subset(ch_refTerm, allSessions == 1)$bill_author)
	m5 <- lm(women_pct ~ magAnalysis + magAnalysisWoman - 1, data = data_m5)
	vcov5 <- cluster.vcov(m5, cluster = data_m5$bill_author)
	s5  <- sqrt(diag(vcov5))

	# M6: G*M C1, C2, C3 (M changes at the middle of C2) (only repeated legislators)
	data_m6 <- fixest::demean(women_pct + magAnalysis + magAnalysisWoman
		~ session + reform + bill_author, data = subset(ch_refTermMChanges, allSessions == 1))
	data_m6 <- cbind(data_m6, bill_author = subset(ch_refTermMChanges, allSessions == 1)$bill_author)
	m6 <- lm(women_pct ~ magAnalysis + magAnalysisWoman - 1, data = data_m6)
	vcov6 <- cluster.vcov(m6, cluster = data_m6$bill_author)
	s6  <- sqrt(diag(vcov6))

	# M7: G*M C1,C3 (only repeated legislators)
	data_m7 <- fixest::demean(women_pct + magAnalysis + magAnalysisWoman
		~ bill_author + session, data = subset(ch_refTerm, session %in% c('2010-2014', '2018-2022') & l10_l18 == 1))
	data_m7 <- cbind(data_m7, bill_author = subset(ch_refTerm, session %in% c('2010-2014', '2018-2022') & l10_l18 == 1)$bill_author)
	m7 <- lm(women_pct ~ magAnalysis + magAnalysisWoman - 1, data = data_m7)
	vcov7 <- cluster.vcov(m7, cluster = data_m7$bill_author)
	s7  <- sqrt(diag(vcov7))

	# M8: C3
	data_m8 <- subset(ch_refTerm, session == '2018-2022')
	m8 <- lm(women_pct ~ magAnalysis + female + magAnalysisWoman, data = data_m8)
	vcov8 <- vcovHC(m8, 'HC2')
	s8  <- sqrt(diag(vcov8))

	### Substantive Effect
	## Scenarios: Women, M = 2
	W2 <- data.frame(mag = 2, female = 1, magAnalysisWoman = 2 * 1)
	## Scenarios: Women, M at mean (post-reform)
	Wmin <- data.frame(mag = min(data_m8$magAnalysis), female = 1, magAnalysisWoman = min(data_m8$magAnalysis) * 1)
	## Scenarios: Women, M at mean (post-reform)
	Wmean <- data.frame(mag = mean(data_m8$magAnalysis), female = 1, magAnalysisWoman = mean(data_m8$magAnalysis) * 1)
	## Scenarios: Women, M at max (post-reform)
	Wmax <- data.frame(mag = max(data_m8$magAnalysis), female = 1, magAnalysisWoman = max(data_m8$magAnalysis) * 1)
	## Scenarios: Men, M = 2
	M2 <- data.frame(mag = 2, female = 0, magAnalysisWoman = 2 * 0)
	## Scenarios: Men, M at mean (post-reform)
	Mmin <- data.frame(mag = min(data_m8$magAnalysis), female = 0, magAnalysisWoman = min(data_m8$magAnalysis) * 0)
	## Scenarios: Men, M at mean (post-reform)
	Mmean <- data.frame(mag = mean(data_m8$magAnalysis), female = 0, magAnalysisWoman = mean(data_m8$magAnalysis) * 0)
	## Scenarios: Men, M at max (post-reform)
	Mmax <- data.frame(mag = max(data_m8$magAnalysis), female = 0, magAnalysisWoman = max(data_m8$magAnalysis) * 0)
	# Women and Men Data
	Wdata <- rbind(W2, Wmin, Wmax)
	Mdata <- rbind(M2, Mmin, Mmax)

	## Sims
	set.seed(123)
	sims1 <- rmvnorm(n = 1000, mean = coef(m1), sigma = vcov1)
	sims2 <- rmvnorm(n = 1000, mean = coef(m2), sigma = vcov2)
	sims3 <- rmvnorm(n = 1000, mean = coef(m3), sigma = vcov3)
	sims4 <- rmvnorm(n = 1000, mean = coef(m4), sigma = vcov4)
	sims5 <- rmvnorm(n = 1000, mean = coef(m5), sigma = vcov5)
	sims6 <- rmvnorm(n = 1000, mean = coef(m6), sigma = vcov6)
	sims7 <- rmvnorm(n = 1000, mean = coef(m7), sigma = vcov7)
	sims8 <- rmvnorm(n = 1000, mean = coef(m8), sigma = vcov8)

	## Calculate Predicted Values
	# Women
	pred1_W <- t(apply((sims1 %*% t(Wdata[2,])) - (sims1 %*% t(Wdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred2_W <- t(apply((sims2 %*% t(Wdata[2,])) - (sims2 %*% t(Wdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred3_W <- t(apply((sims3 %*% t(Wdata[2,])) - (sims3 %*% t(Wdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred4_W <- t(apply((sims4 %*% t(Wdata[2, -2])) - (sims4 %*% t(Wdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred5_W <- t(apply((sims5 %*% t(Wdata[2, -2])) - (sims5 %*% t(Wdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred6_W <- t(apply((sims6 %*% t(Wdata[2, -2])) - (sims6 %*% t(Wdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred7_W <- t(apply((sims7 %*% t(Wdata[2, -2])) - (sims7 %*% t(Wdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred8_W <- t(apply((sims8 %*% t(cbind(1, Wdata)[2,])) - (sims8 %*% t(cbind(1, Wdata)[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))

	# Men
	pred1_M <- t(apply((sims1 %*% t(Mdata[2,])) - (sims1 %*% t(Mdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred2_M <- t(apply((sims2 %*% t(Mdata[2,])) - (sims2 %*% t(Mdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred3_M <- t(apply((sims3 %*% t(Mdata[2,])) - (sims3 %*% t(Mdata[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred4_M <- t(apply((sims4 %*% t(Mdata[2, -2])) - (sims4 %*% t(Mdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred5_M <- t(apply((sims5 %*% t(Mdata[2, -2])) - (sims5 %*% t(Mdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred6_M <- t(apply((sims6 %*% t(Mdata[2, -2])) - (sims6 %*% t(Mdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred7_M <- t(apply((sims7 %*% t(Mdata[2, -2])) - (sims7 %*% t(Mdata[1, -2])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))
	pred8_M <- t(apply((sims8 %*% t(cbind(1, Mdata)[2,])) - (sims8 %*% t(cbind(1, Mdata)[1,])), 2, quantile, c(0.025,0.05, 0.5, 0.95, 0.975)))

	out <- list()
	out[['women']] <- rbind(pred1_W, pred2_W, pred3_W, pred4_W, pred5_W, pred6_W, pred7_W, pred8_W)
	out[['men']] <- rbind(pred1_M, pred2_M, pred3_M, pred4_M, pred5_M, pred6_M, pred7_M, pred8_M)

	return(out)
}

# Models removing one party each time
EVOP <- partyOut(partyout = 'EVOP')
FREVS <- partyOut(partyout = 'FREVS')
IC <- partyOut(partyout = 'IC')
Ind <- partyOut(partyout = 'Ind')
PCCh <- partyOut(partyout = 'PCCh')
PDC <- partyOut(partyout = 'PDC')
PEV <- partyOut(partyout = 'PEV')
PH <- partyOut(partyout = 'PH')
PI <- partyOut(partyout = 'PI')
PL <- partyOut(partyout = 'PL')
Poder <- partyOut(partyout = 'Poder')
PPD <- partyOut(partyout = 'PPD')
PRI <- partyOut(partyout = 'PRI')
PRO <- partyOut(partyout = 'PRO')
PRSD <- partyOut(partyout = 'PRSD')
PS <- partyOut(partyout = 'PS')
RD <- partyOut(partyout = 'RD')
RN <- partyOut(partyout = 'RN')
UDI <- partyOut(partyout = 'UDI')

plotParty <- function(results, gender = 'women'){
	party_name <- deparse(substitute(results))
	par(mar = c(5, 5, 4, 4))
	plot(x = results[[gender]][, 3], y = 1:8, pch = 19, 
		xlim = c(-1, 4), axes = FALSE, xlab = "Predicted change in portfolio on women's issues", cex.lab = 1.75,
		ylab = 'Model', main = paste('Removing', party_name), cex = 1.5, cex.main = 2)
	axis(1, cex.axis = 1.75)
	axis(2, cex.axis = 1.75, las = 2)
	sapply(1:8, function (i) lines(x = c(results[[gender]][i, 1], results[[gender]][i, 5]), y = c(i, i)))
	sapply(1:8, function (i) lines(x = c(results[[gender]][i, 2], results[[gender]][i, 4]), y = c(i, i), lwd = 3))
	abline(v = 0, lty = 2, lwd = 0.5)
	
}
plotParty(EVOP)
plotParty(FREVS)
plotParty(IC)
plotParty(Ind)
plotParty(PCCh)
plotParty(PDC)
plotParty(PEV)
plotParty(PH)
plotParty(PI)
plotParty(PL)
plotParty(Poder)
plotParty(PPD)
plotParty(PRI)
plotParty(PRO)
plotParty(PRSD)
plotParty(PS)
plotParty(RD)
plotParty(RN)
plotParty(UDI)


plotParty(EVOP, gender = 'men')
plotParty(FREVS, gender = 'men')
plotParty(IC, gender = 'men')
plotParty(Ind, gender = 'men')
plotParty(PCCh, gender = 'men')
plotParty(PDC, gender = 'men')
plotParty(PEV, gender = 'men')
plotParty(PH, gender = 'men')
plotParty(PI, gender = 'men')
plotParty(PL, gender = 'men')
plotParty(Poder, gender = 'men')
plotParty(PPD, gender = 'men')
plotParty(PRI, gender = 'men')
plotParty(PRO, gender = 'men')
plotParty(PRSD, gender = 'men')
plotParty(PS, gender = 'men')
plotParty(RD, gender = 'men')
plotParty(RN, gender = 'men')
plotParty(UDI, gender = 'men')

# Count the number of significant results
table(c(EVOP[['women']][, 2] > 0, FREVS[['women']][, 2] > 0, IC[['women']][, 2] > 0,
Ind[['women']][, 2] > 0, PCCh[['women']][, 2] > 0, PDC[['women']][, 2] > 0,
PEV[['women']][, 2] > 0, PH[['women']][, 2] > 0,  PI[['women']][, 2] > 0,
PL[['women']][, 2] > 0, Poder[['women']][, 2] > 0, PPD[['women']][, 2] > 0,
PRI[['women']][, 2] > 0, PRO[['women']][, 2] > 0, PRSD[['women']][, 2] > 0,
PS[['women']][, 2] > 0, RD[['women']][, 2] > 0, RN[['women']][, 2] > 0,
UDI[['women']][, 2] > 0))

table(c(EVOP[['men']][, 2] > 0, FREVS[['men']][, 2] > 0, IC[['men']][, 2] > 0,
Ind[['men']][, 2] > 0, PCCh[['men']][, 2] > 0, PDC[['men']][, 2] > 0,
PEV[['men']][, 2] > 0, PH[['men']][, 2] > 0,  PI[['men']][, 2] > 0,
PL[['men']][, 2] > 0, Poder[['men']][, 2] > 0, PPD[['men']][, 2] > 0,
PRI[['men']][, 2] > 0, PRO[['men']][, 2] > 0, PRSD[['men']][, 2] > 0,
PS[['men']][, 2] > 0, RD[['men']][, 2] > 0, RN[['men']][, 2] > 0,
UDI[['men']][, 2] > 0))